library(tidyverse)
library(plotly)
library(sf)
library(mapview)
library(tigris)
library(censusapi)
library(leaflet)
library(lehdr)
library(usmap)


options(
  tigris_class = "sf",
  tigris_use_cache = TRUE
)

Sys.setenv(CENSUS_KEY="10dcd73d7c043e91bac9fb8d3989cbff54b08790")

Load social distancing data

Load the Safegraph social distancing data

# bay_blockgroups <- readRDS("/Users/simonespeizer/pCloud Drive/Shared/SFBI/Restricted Data Library/Safegraph/covid19analysis/bay_blockgroups.rds")

bay_sd <- readRDS("/Users/simonespeizer/pCloud Drive/Shared/SFBI/Restricted Data Library/Safegraph/covid19analysis/bay_socialdistancing_v2.rds") %>% 
  mutate(date = date_range_start %>%  substr(1,10) %>% as.Date())

# obtaining weekends
weekends <- bay_sd %>% 
  filter(!duplicated(date)) %>% 
  arrange(date) %>% 
  mutate(weekend = ifelse((date %>% as.numeric()) %% 7 %in% c(2,3), T, F)) %>% 
  dplyr::select(date,weekend)

bay_sd <- bay_sd %>% left_join(weekends)

# date of the shelter in place order
shelter_start <- "2020-03-16" %>% as.Date()

# store an average of percent of devices completely at home since the shelter in place order started on weekdays
bay_sd_at_home_average <- bay_sd %>% 
  filter(weekend == F) %>% 
  filter(date > shelter_start) %>%
  group_by(origin_census_block_group) %>% 
  summarize(completely_home_device_count = sum(completely_home_device_count), device_count = sum(device_count)) %>% 
  mutate(`% Completely at Home` = (completely_home_device_count/device_count*100) %>% round(1), `% Not Completely at Home` = (100 - `% Completely at Home`))

# store average of percent of devices completely at home for January and February on weekdays
bay_pre_sd_at_home_average <- bay_sd %>% 
  filter(weekend == F) %>% 
  filter(date <  as.Date("2020-03-01")) %>%
  group_by(origin_census_block_group) %>% 
  summarize(completely_home_device_count = sum(completely_home_device_count), device_count = sum(device_count)) %>% 
  mutate(`% Completely at Home Pre Shelter` = (completely_home_device_count/device_count*100) %>% round(1), `% Not Completely at Home Pre Shelter` = (100 - `% Completely at Home Pre Shelter`))

bay_sd_at_home_average <- bay_sd_at_home_average %>% left_join(bay_pre_sd_at_home_average %>% dplyr::select(origin_census_block_group, `% Completely at Home Pre Shelter`, `% Not Completely at Home Pre Shelter`))

I next obtain various demographic data and plot them against social distancing behavior, and examine for correlations.

# obtain the saved census data 
setwd("~/Documents/2020 Spring Quarter/CEE 218Z")
acs_vars = readRDS("censusData2018_acs_acs5.rds")
setwd("~/Documents/2020 Spring Quarter/CEE 218Z/covid19")

# get FIPS codes for CA counties
bay_area_counties <- lapply(fips("CA", c("Alameda", "Contra Costa", "Marin", "Napa", "San Francisco", "San Mateo", "Santa Clara", "Solano", "Sonoma")), function(x) substr(x,3,5))

# define a function for pulling census data
pullCensus <- function(variableToPull, counties) {
  censusData <- NULL
  for (i in 1:length(counties)) {
    county <- counties[i]
    regionString <- paste0("state:06+county:", county)
    censusDataCounty <- getCensus(
      name = "acs/acs5",
      vintage = 2018,
      region = "block group:*", 
      regionin = regionString,
      vars = variableToPull
    ) %>%
    mutate(blockgroup = paste0(state,county,tract,block_group)) %>%
      select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME"))
    censusData <- rbind(censusData, censusDataCounty)
  }
  
  return(censusData)
}

Income

# load in income data - code adapted from other students
bay_median_income_by_block <-
  pullCensus("B19013_001E", bay_area_counties) %>% 
  filter(blockgroup %in% bay_sd$origin_census_block_group) %>%
  rename(
    Median_Income = B19013_001E 
  ) %>% 
  filter(!is.na(Median_Income)) %>% 
  left_join(bay_sd_at_home_average, by = c("blockgroup" = "origin_census_block_group")) %>% 
  filter(!is.na(device_count)) 

bay_ami_by_block <-
  pullCensus("group(B19001)", bay_area_counties) %>%
  dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  filter(blockgroup %in% bay_sd$origin_census_block_group) %>%
  group_by(blockgroup) %>% 
  summarize(
    Total = B19001_001E,
    `Under 75,000` = sum(B19001_002E, B19001_003E, B19001_004E, B19001_005E, B19001_006E, B19001_007E, B19001_008E, B19001_009E, B19001_010E, B19001_011E, B19001_012E),
    #sum(lapply(2:12, function(x) as.name(paste0("B19001_00",x,"E"))))
    `Under 100,000` = sum(B19001_002E, B19001_003E, B19001_004E, B19001_005E, B19001_006E, B19001_007E, B19001_008E, B19001_009E, B19001_010E, B19001_011E, B19001_012E, B19001_013E),
    `Under 125,000` = sum(B19001_002E, B19001_003E, B19001_004E, B19001_005E, B19001_006E, B19001_007E, B19001_008E, B19001_009E, B19001_010E, B19001_011E, B19001_012E, B19001_013E, B19001_014E),
    `Under 150,000` = sum(B19001_002E, B19001_003E, B19001_004E, B19001_005E, B19001_006E, B19001_007E, B19001_008E, B19001_009E, B19001_010E, B19001_011E, B19001_012E, B19001_013E, B19001_014E, B19001_015E)
  ) %>% 
  mutate(
    `% under 75,000` = `Under 75,000` / Total * 100,
    `% over 75,000` = (100 - `% under 75,000`),
    `% under 100,000` = `Under 100,000` / Total * 100,
    `% over 100,000` = (100 - `% under 100,000`),
    `% under 125,000` = `Under 125,000` / Total * 100,
    `% over 125,000` = (100 - `% under 125,000`),
    `% under 150,000` = `Under 150,000` / Total * 100,
    `% over 150,000` = (100 - `% under 150,000`),
  ) %>% 
  left_join(bay_median_income_by_block %>% dplyr::select(-Median_Income)
  ) %>% 
  filter(!is.na(device_count))

# plotting
bay_ami_by_block %>% 
  ggplot(aes(
  x = `% over 75,000`,
  y = `% Not Completely at Home`
)) + geom_point() + 
  geom_smooth(method=lm) +
  labs(
    x = "Percent of housholds with incomes over $75,000 annually",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "Bay Area: Social Distancing and Households Above $75,000"
  )

income_75_model <- lm(`% Not Completely at Home` ~ `% over 75,000`, bay_ami_by_block)
summary(income_75_model)
## 
## Call:
## lm(formula = `% Not Completely at Home` ~ `% over 75,000`, data = bay_ami_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -24.905  -5.551  -0.528   4.965  42.829 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     62.686852   0.407889  153.69   <2e-16 ***
## `% over 75,000` -0.153156   0.006411  -23.89   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.52 on 4728 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.1077, Adjusted R-squared:  0.1075 
## F-statistic: 570.7 on 1 and 4728 DF,  p-value: < 2.2e-16
# income - less than $100000
bay_ami_by_block %>% 
  ggplot(aes(
  x = `% over 100,000`,
  y = `% Not Completely at Home`
)) + geom_point() + 
  geom_smooth(method=lm) +
  labs(
    x = "Percent of housholds with incomes over $100,000 annually",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "Bay Area: Social Distancing and Households Above $100,000"
  )

income_100_model <- lm(`% Not Completely at Home` ~ `% over 100,000`, bay_ami_by_block)
summary(income_100_model)
## 
## Call:
## lm(formula = `% Not Completely at Home` ~ `% over 100,000`, data = bay_ami_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -23.799  -5.575  -0.628   4.788  44.767 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      61.086199   0.315331  193.72   <2e-16 ***
## `% over 100,000` -0.156528   0.005921  -26.44   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.419 on 4728 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.1288, Adjusted R-squared:  0.1286 
## F-statistic:   699 on 1 and 4728 DF,  p-value: < 2.2e-16
# income - less than $125000
bay_ami_by_block %>% 
  ggplot(aes(
  x = `% over 125,000`,
  y = `% Not Completely at Home`
)) + geom_point() + 
  geom_smooth(method=lm) +
  labs(
    x = "Percent of housholds with incomes over $125,000 annually",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "Bay Area: Social Distancing and Households Above $125,000"
  )

income_125_model <- lm(`% Not Completely at Home` ~ `% over 125,000`, bay_ami_by_block)
summary(income_125_model)
## 
## Call:
## lm(formula = `% Not Completely at Home` ~ `% over 125,000`, data = bay_ami_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -25.719  -5.535  -0.563   4.693  46.825 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      59.88233    0.26223  228.36   <2e-16 ***
## `% over 125,000` -0.16507    0.00592  -27.88   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.358 on 4728 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.1412, Adjusted R-squared:  0.141 
## F-statistic: 777.5 on 1 and 4728 DF,  p-value: < 2.2e-16
# income - less than $150000
bay_ami_by_block %>% 
  ggplot(aes(
  x = `% over 150,000`,
  y = `% Not Completely at Home`
)) + geom_point() + 
  geom_smooth(method=lm) +
  labs(
    x = "Percent of housholds with incomes over $150,000 annually",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "Bay Area: Social Distancing and Households Above $150,000"
  )

income_150_model <- lm(`% Not Completely at Home` ~ `% over 150,000`, bay_ami_by_block)
summary(income_150_model)
## 
## Call:
## lm(formula = `% Not Completely at Home` ~ `% over 150,000`, data = bay_ami_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.131  -5.471  -0.559   4.771  45.293 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      58.746787   0.229335   256.2   <2e-16 ***
## `% over 150,000` -0.170958   0.006217   -27.5   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.375 on 4728 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.1379, Adjusted R-squared:  0.1377 
## F-statistic: 756.1 on 1 and 4728 DF,  p-value: < 2.2e-16

Compare to pre-shelter-in-place behavior:

bay_ami_by_block %>% 
  ggplot(aes(
  x = `% over 75,000`,
  y = `% Not Completely at Home Pre Shelter`
)) + geom_point() + 
  geom_smooth(method=lm) +
  labs(
    x = "Percent of housholds with incomes over $75,000 annually",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "Bay Area: Staying at Home and Households Above $75,000 Pre Shelter-in-Place"
  )

income_75_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `% over 75,000`, bay_ami_by_block)
summary(income_75_model2)
## 
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `% over 75,000`, 
##     data = bay_ami_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -30.095  -2.942   0.245   3.290  20.435 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     70.965396   0.239540  296.26   <2e-16 ***
## `% over 75,000`  0.113036   0.003765   30.02   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.003 on 4728 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.1601, Adjusted R-squared:  0.1599 
## F-statistic: 901.3 on 1 and 4728 DF,  p-value: < 2.2e-16
# income - less than $100000
bay_ami_by_block %>% 
  ggplot(aes(
  x = `% over 100,000`,
  y = `% Not Completely at Home Pre Shelter`
)) + geom_point() + 
  geom_smooth(method=lm) +
  labs(
    x = "Percent of housholds with incomes over $100,000 annually",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "Bay Area: Staying at Home and Households Above $100,000 Pre Shelter-in-Place"
  )

income_100_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `% over 100,000`, bay_ami_by_block)
summary(income_100_model2)
## 
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `% over 100,000`, 
##     data = bay_ami_by_block)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -30.2334  -2.9109   0.3057   3.3078  18.9262 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      72.473764   0.186304  389.01   <2e-16 ***
## `% over 100,000`  0.108863   0.003498   31.12   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.974 on 4728 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:   0.17,  Adjusted R-squared:  0.1699 
## F-statistic: 968.6 on 1 and 4728 DF,  p-value: < 2.2e-16
# income - less than $125000
bay_ami_by_block %>% 
  ggplot(aes(
  x = `% over 125,000`,
  y = `% Not Completely at Home Pre Shelter`
)) + geom_point() + 
  geom_smooth(method=lm) +
  labs(
    x = "Percent of housholds with incomes over $125,000 annually",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "Bay Area: Social Distancing and Households Above $125,000 Pre Shelter-in-Place"
  )

income_125_model <- lm(`% Not Completely at Home Pre Shelter` ~ `% over 125,000`, bay_ami_by_block)
summary(income_125_model)
## 
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `% over 125,000`, 
##     data = bay_ami_by_block)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -31.3512  -2.8027   0.3572   3.2505  17.9572 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      73.442753   0.155512  472.26   <2e-16 ***
## `% over 125,000`  0.111451   0.003511   31.74   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.957 on 4728 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.1757, Adjusted R-squared:  0.1755 
## F-statistic:  1008 on 1 and 4728 DF,  p-value: < 2.2e-16
# income - less than $150000
bay_ami_by_block %>% 
  ggplot(aes(
  x = `% over 150,000`,
  y = `% Not Completely at Home Pre Shelter`
)) + geom_point() + 
  geom_smooth(method=lm) +
  labs(
    x = "Percent of housholds with incomes over $150,000 annually",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "Bay Area: Social Distancing and Households Above $150,000 Pre Shelter-in-Place"
  )

income_150_model <- lm(`% Not Completely at Home Pre Shelter` ~ `% over 150,000`, bay_ami_by_block)
summary(income_150_model)
## 
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `% over 150,000`, 
##     data = bay_ami_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -32.373  -2.878   0.360   3.302  17.222 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      74.178083   0.135838  546.08   <2e-16 ***
## `% over 150,000`  0.116426   0.003683   31.61   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.96 on 4728 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.1745, Adjusted R-squared:  0.1743 
## F-statistic: 999.5 on 1 and 4728 DF,  p-value: < 2.2e-16

Language

# loading in language data - code adapted from other students
bay_lang_by_block <-
  pullCensus("group(B16004)", bay_area_counties) %>%
  select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(
    key = "variable",
    value = "estimate", 
    - blockgroup
  ) %>% 
  left_join(acs_vars, by = c("variable" = "name")) %>% 
  mutate(
    tier = substr(label,lapply(label, function(x) max(unlist(gregexpr('!!',x)))+2),nchar(label))
  ) %>% 
  filter(tier %in% c('Speak English "not well"', 
                     'Speak English "not at all"', 
                     'Total', 'Speak Spanish', 
                     'Speak Asian and Pacific Island languages')) %>% 
  group_by(blockgroup, tier) %>% 
  summarise(
    estimate1 = sum(estimate)
  ) %>% 
  spread(
    key = "tier",
    value = "estimate1"
  ) %>% 
  mutate(
    `% speaking english < well` = (`Speak English "not well"` + `Speak English "not at all"`) / Total * 100,
    `% speaking english > well` = (100 - `% speaking english < well`),
    `% speaking spanish` = (`Speak Spanish`/ Total) * 100,
    `% not speaking spanish` = (100 - `% speaking spanish`),
    `% speaking api` = (`Speak Asian and Pacific Island languages` / Total) * 100
  ) %>% 
  left_join(bay_median_income_by_block %>% dplyr::select(-Median_Income)) %>% 
  filter(!is.na(device_count)) %>% 
  mutate(log_perc = log(`% speaking english < well`))

# plotting
bay_lang_by_block %>% 
  ggplot(aes(
  x = `% speaking english > well`,
  y = `% Not Completely at Home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of individuals speaking English well",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "Bay Area: Social Distancing and English Language Ability"
  )

english_ability_model <- lm(`% Not Completely at Home` ~ `% speaking english > well`, bay_lang_by_block)
summary(english_ability_model)
## 
## Call:
## lm(formula = `% Not Completely at Home` ~ `% speaking english > well`, 
##     data = bay_lang_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.907  -5.878  -0.330   5.561  40.918 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 54.156680   1.438832  37.639   <2e-16 ***
## `% speaking english > well` -0.007796   0.015520  -0.502    0.615    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.068 on 4734 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  5.33e-05,   Adjusted R-squared:  -0.0001579 
## F-statistic: 0.2523 on 1 and 4734 DF,  p-value: 0.6155
bay_lang_by_block %>% 
  ggplot(aes(
  x = `% not speaking spanish`,
  y = `% Not Completely at Home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of individuals not speaking Spanish",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "Bay Area: Social Distancing and Spanish Language Ability"
  )

spanish_speaking_model <- lm(`% Not Completely at Home` ~ `% not speaking spanish`, bay_lang_by_block)
summary(spanish_speaking_model)
## 
## Call:
## lm(formula = `% Not Completely at Home` ~ `% not speaking spanish`, 
##     data = bay_lang_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.653  -5.692  -0.521   5.032  41.760 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              63.418350   0.628664  100.88   <2e-16 ***
## `% not speaking spanish` -0.118836   0.007327  -16.22   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.826 on 4734 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.05264,    Adjusted R-squared:  0.05244 
## F-statistic:   263 on 1 and 4734 DF,  p-value: < 2.2e-16

Compare to pre-shelter-in-place behavior:

bay_lang_by_block %>% 
  ggplot(aes(
  x = `% speaking english > well`,
  y = `% Not Completely at Home Pre Shelter`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of individuals speaking English well",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "Bay Area: Staying at Home and English Language Ability Pre Shelter-in-Place"
  )

english_ability_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `% speaking english > well`, bay_lang_by_block)
summary(english_ability_model2)
## 
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `% speaking english > well`, 
##     data = bay_lang_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -33.866  -3.136   0.362   3.676  14.914 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 61.829284   0.837774   73.80   <2e-16 ***
## `% speaking english > well`  0.173148   0.009037   19.16   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.279 on 4732 degrees of freedom
##   (9 observations deleted due to missingness)
## Multiple R-squared:  0.07199,    Adjusted R-squared:  0.0718 
## F-statistic: 367.1 on 1 and 4732 DF,  p-value: < 2.2e-16
bay_lang_by_block %>% 
  ggplot(aes(
  x = `% not speaking spanish`,
  y = `% Not Completely at Home Pre Shelter`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of individuals not speaking Spanish",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "Bay Area: Staying at Home and Spanish Language Ability Pre Shelter-in-Place"
  )

spanish_speaking_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `% not speaking spanish`, bay_lang_by_block)
summary(spanish_speaking_model2)
## 
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `% not speaking spanish`, 
##     data = bay_lang_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -32.848  -3.159   0.413   3.603  13.869 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              70.818907   0.376268  188.21   <2e-16 ***
## `% not speaking spanish`  0.083275   0.004386   18.99   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.283 on 4732 degrees of freedom
##   (9 observations deleted due to missingness)
## Multiple R-squared:  0.0708, Adjusted R-squared:  0.07061 
## F-statistic: 360.6 on 1 and 4732 DF,  p-value: < 2.2e-16

Age

# loading in age data - specifically looking at percentage 65+ and percentage <30
bay_age_by_block <- 
  pullCensus("group(B01001)", bay_area_counties) %>%
  select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(
    key = "variable",
    value = "estimate", 
    - blockgroup
  ) %>% 
  mutate(
    label = acs_vars$label[match(variable,acs_vars$name)]
  ) %>% 
  select(-variable) %>% 
  separate(
    label,
    into = c(NA,NA,"sex","age"),
    sep = "!!"
  ) %>% filter(!is.na(age)) %>% 
  mutate(elderly = ifelse(age %in% c("65 and 66 years", "67 to 69 years", "70 to 74 years", "75 to 79 years", "80 to 84 years", "85 years and over"), estimate, NA), `less than 30` = ifelse(age %in% c("Under 5 years", "5 to 9 years", "10 to 14 years", "15 to 17 years", "18 and 19 years", "20 years", "21 years", "22 to 24 years", "25 to 29 years"), estimate, NA)) %>% 
  group_by(blockgroup) %>% 
  summarize(elderly = sum(elderly, na.rm = T), `less than 30` = sum(`less than 30`, na.rm = T), total = sum(estimate, na.rm = T)) %>% 
  mutate(`percent elderly` = elderly*100 / total, `percent less than 30` = `less than 30`*100 / total, `percent nonelderly` = (100 - `percent elderly`)) %>% 
  left_join(bay_median_income_by_block %>% dplyr::select(-Median_Income)) %>% 
  filter(!is.na(device_count)) 

# plotting
bay_age_by_block %>%
  ggplot(aes(
  x = `percent less than 30`,
  y = `% Not Completely at Home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
labs(
    x = "Percent of residents younger than 30",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "Bay Area: Social Distancing and Young Age Groups"
  )

young_model <- lm(bay_age_by_block$`% Not Completely at Home` ~ bay_age_by_block$`percent less than 30`)
summary(young_model)
## 
## Call:
## lm(formula = bay_age_by_block$`% Not Completely at Home` ~ bay_age_by_block$`percent less than 30`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -30.460  -5.764  -0.312   5.371  37.944 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)                             49.10671    0.46082 106.564   <2e-16
## bay_age_by_block$`percent less than 30`  0.12215    0.01247   9.798   <2e-16
##                                            
## (Intercept)                             ***
## bay_age_by_block$`percent less than 30` ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.978 on 4734 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.01987,    Adjusted R-squared:  0.01967 
## F-statistic: 95.99 on 1 and 4734 DF,  p-value: < 2.2e-16
bay_age_by_block %>% filter(`percent elderly` < 50) %>% # get rid of extreme outliers
  ggplot(aes(
  x = `percent elderly`,
  y = `% Not Completely at Home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of residents 65 and older",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "Bay Area: Social Distancing and Elderly Population"
  )

elderly_model <- lm(`% Not Completely at Home` ~ `percent elderly`, bay_age_by_block %>% filter(`percent elderly` < 50))
summary(elderly_model)
## 
## Call:
## lm(formula = `% Not Completely at Home` ~ `percent elderly`, 
##     data = bay_age_by_block %>% filter(`percent elderly` < 50))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -29.586  -5.830  -0.356   5.557  40.088 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       54.24498    0.28023  193.58  < 2e-16 ***
## `percent elderly` -0.05550    0.01618   -3.43 0.000609 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.036 on 4693 degrees of freedom
## Multiple R-squared:  0.002501,   Adjusted R-squared:  0.002288 
## F-statistic: 11.76 on 1 and 4693 DF,  p-value: 0.0006089

Compare to pre-shelter-in-place behavior:

bay_age_by_block %>%
  ggplot(aes(
  x = `percent less than 30`,
  y = `% Not Completely at Home Pre Shelter`
)) + geom_point() + 
  geom_smooth(method=lm) + 
labs(
    x = "Percent of residents younger than 30",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "Bay Area: Staying at Home and Young Age Groups Pre Shelter-in-Place"
  )

young_model2 <- lm(bay_age_by_block$`% Not Completely at Home Pre Shelter` ~ bay_age_by_block$`percent less than 30`)
summary(young_model2)
## 
## Call:
## lm(formula = bay_age_by_block$`% Not Completely at Home Pre Shelter` ~ 
##     bay_age_by_block$`percent less than 30`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -33.649  -3.332   0.283   3.749  15.296 
## 
## Coefficients:
##                                          Estimate Std. Error t value Pr(>|t|)
## (Intercept)                             78.773529   0.281877 279.460  < 2e-16
## bay_age_by_block$`percent less than 30` -0.027096   0.007631  -3.551 0.000388
##                                            
## (Intercept)                             ***
## bay_age_by_block$`percent less than 30` ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.473 on 4732 degrees of freedom
##   (9 observations deleted due to missingness)
## Multiple R-squared:  0.002657,   Adjusted R-squared:  0.002447 
## F-statistic: 12.61 on 1 and 4732 DF,  p-value: 0.0003878
bay_age_by_block %>% filter(`percent elderly` < 50) %>% # get rid of extreme outliers
  ggplot(aes(
  x = `percent elderly`,
  y = `% Not Completely at Home Pre Shelter`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of residents 65 and older",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "Bay Area: Staying at Home and Elderly Population Pre Shelter-in-Place"
  )

elderly_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `percent elderly`, bay_age_by_block %>% filter(`percent elderly` < 50))
summary(elderly_model2)
## 
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `percent elderly`, 
##     data = bay_age_by_block %>% filter(`percent elderly` < 50))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -32.714  -3.316   0.331   3.681  14.810 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       76.546794   0.167597 456.730   <2e-16 ***
## `percent elderly`  0.085606   0.009675   8.848   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.399 on 4691 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.01642,    Adjusted R-squared:  0.01621 
## F-statistic: 78.29 on 1 and 4691 DF,  p-value: < 2.2e-16

Vehicles available

# also get data on vehicles available as households without a vehicle
bay_no_vehicles_by_block <- pullCensus("group(B25044)", bay_area_counties) %>%
  select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(key = "variable", value = "estimate", -blockgroup) %>% 
  mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>% 
  select(-variable) %>%
  separate(label, into = c(NA, NA, NA,"vehicles"), sep = "!!") %>% 
  filter(!is.na(vehicles)) %>%
  group_by(blockgroup, vehicles) %>%
  summarize(grouped_vehicles = sum(estimate)) %>%
  spread(key = vehicles, value = grouped_vehicles) %>%
  mutate(total_nums = `1 vehicle available` + `2 vehicles available` + `3 vehicles available` + `4 vehicles available` + `5 or more vehicles available` + `No vehicle available`, `percent no vehicles` = `No vehicle available`*100 / total_nums, `percent with vehicles` = (100-`percent no vehicles`)) %>%
  left_join(bay_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
  filter(!is.na(device_count))

# plotting
bay_no_vehicles_by_block %>% 
  ggplot(aes(
  x = `percent with vehicles`,
  y = `% Not Completely at Home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of housholds with vehicles available",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "Bay Area: Social Distancing and Vehicle Availability"
  )

vehicles_model <- lm(`% Not Completely at Home` ~ `percent with vehicles`, bay_no_vehicles_by_block)
summary(vehicles_model)
## 
## Call:
## lm(formula = `% Not Completely at Home` ~ `percent with vehicles`, 
##     data = bay_no_vehicles_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.539  -5.918  -0.297   5.597  41.035 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             57.35685    0.96724  59.299  < 2e-16 ***
## `percent with vehicles` -0.04318    0.01047  -4.125 3.76e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.003 on 4728 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.003587,   Adjusted R-squared:  0.003376 
## F-statistic: 17.02 on 1 and 4728 DF,  p-value: 3.763e-05

Compare to pre-shelter-in-place behavior:

bay_no_vehicles_by_block %>% 
  ggplot(aes(
  x = `percent with vehicles`,
  y = `% Not Completely at Home Pre Shelter`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of housholds with vehicles available",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "Bay Area: Staying at Home and Vehicle Availability Pre Shelter-in-Place"
  )

vehicles_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `percent with vehicles`, bay_no_vehicles_by_block)
summary(vehicles_model2)
## 
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `percent with vehicles`, 
##     data = bay_no_vehicles_by_block)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -30.3199  -3.2767   0.2145   3.5354  22.1954 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             66.925407   0.564331  118.59   <2e-16 ***
## `percent with vehicles`  0.118945   0.006106   19.48   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.253 on 4728 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.07429,    Adjusted R-squared:  0.0741 
## F-statistic: 379.5 on 1 and 4728 DF,  p-value: < 2.2e-16

Occupants per room

# get data on occupants per room
bay_occupants_per_room_by_block <- pullCensus("group(B25014)", bay_area_counties) %>%
  select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(key = "variable", value = "estimate", -blockgroup) %>% 
  mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>% 
  select(-variable) %>% 
  separate(label, into = c(NA, NA, NA,"occupants per room"), sep = "!!") %>% 
  filter(!is.na(`occupants per room`)) %>%
  group_by(blockgroup, `occupants per room`) %>%
  summarize(estimate_tot = sum(estimate)) %>% 
  spread(key = `occupants per room`, value = estimate_tot) %>%
  mutate(total_nums = `0.50 or less occupants per room` + `0.51 to 1.00 occupants per room` + `1.01 to 1.50 occupants per room` + `1.51 to 2.00 occupants per room` + `2.01 or more occupants per room`, `percent 1 or more` = (`1.01 to 1.50 occupants per room` + `1.51 to 2.00 occupants per room` + `2.01 or more occupants per room`) * 100/ total_nums, `percent less than 1` = (100-`percent 1 or more`)) %>%
  left_join(bay_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
  filter(!is.na(device_count)) 

# plotting
bay_occupants_per_room_by_block %>% 
  ggplot(aes(
  x = `percent less than 1`,
  y = `% Not Completely at Home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of households with 1 or fewer occupant per room",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "Bay Area: Social Distancing and Room Occupancy"
  )

occupants_model <- lm(`% Not Completely at Home` ~ `percent less than 1`, bay_occupants_per_room_by_block)
summary(occupants_model)
## 
## Call:
## lm(formula = `% Not Completely at Home` ~ `percent less than 1`, 
##     data = bay_occupants_per_room_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.306  -5.793  -0.317   5.481  41.080 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           61.70399    1.43003  43.149  < 2e-16 ***
## `percent less than 1` -0.08898    0.01526  -5.829 5.95e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.987 on 4728 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.007135,   Adjusted R-squared:  0.006925 
## F-statistic: 33.98 on 1 and 4728 DF,  p-value: 5.946e-09

Compare to pre-shelter-in-place behavior:

bay_occupants_per_room_by_block %>% 
  ggplot(aes(
  x = `percent less than 1`,
  y = `% Not Completely at Home Pre Shelter`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of households with 1 or fewer occupant per room",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "Bay Area: Staying at Home and Room Occupancy Pre Shelter-in-Place"
  )

occupants_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `percent less than 1`, bay_occupants_per_room_by_block)
summary(occupants_model2)
## 
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `percent less than 1`, 
##     data = bay_occupants_per_room_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -30.361  -3.161   0.316   3.671  17.116 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           63.306072   0.842479   75.14   <2e-16 ***
## `percent less than 1`  0.155550   0.008993   17.30   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.295 on 4728 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.05951,    Adjusted R-squared:  0.05931 
## F-statistic: 299.2 on 1 and 4728 DF,  p-value: < 2.2e-16

Education

bay_education_by_block <- pullCensus("group(B15003)", bay_area_counties) %>%
  select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(key = "variable", value = "estimate", -blockgroup) %>% 
  mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>% 
  select(-variable) %>% 
  separate(label, into = c(NA, NA, "education level"), sep = "!!") %>% 
  mutate(`education level` = replace_na(`education level`, "total_educ")) %>% # if the education level field is NA, this corresponded to the total number in that blockgroup
  spread(key = `education level`, value = estimate) %>%
  mutate(`percent associates or higher` = (`Associate's degree` + `Bachelor's degree` + `Doctorate degree` + `Master's degree`)*100/total_educ, `percent less than associates` = 100-`percent associates or higher`) %>% 
  left_join(bay_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
  filter(!is.na(device_count)) 

# plotting
bay_education_by_block %>% 
  ggplot(aes(
  x = `percent associates or higher`,
  y = `% Not Completely at Home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of people with an degree at Associate's level or higher",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "Bay Area: Social Distancing and Education"
  )

educ_model <- lm(`% Not Completely at Home` ~ `percent associates or higher`, bay_education_by_block)
summary(educ_model)
## 
## Call:
## lm(formula = `% Not Completely at Home` ~ `percent associates or higher`, 
##     data = bay_education_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.065  -5.606  -0.766   4.826  43.742 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    60.502867   0.338627  178.67   <2e-16 ***
## `percent associates or higher` -0.140169   0.006236  -22.48   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.608 on 4733 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.09645,    Adjusted R-squared:  0.09626 
## F-statistic: 505.2 on 1 and 4733 DF,  p-value: < 2.2e-16

Compare to pre-shelter-in-place behavior:

bay_education_by_block %>% 
  ggplot(aes(
  x = `percent associates or higher`,
  y = `% Not Completely at Home Pre Shelter`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of people with an degree at Associate's level or higher",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "Bay Area: Staying at Home and Education Pre Shelter-in-Place"
  )

educ_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `percent associates or higher`, bay_education_by_block)
summary(educ_model2)
## 
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `percent associates or higher`, 
##     data = bay_education_by_block)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -31.2636  -3.0081   0.4311   3.4893  15.3499 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    73.135228   0.202927  360.40   <2e-16 ***
## `percent associates or higher`  0.092690   0.003737   24.81   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.155 on 4732 degrees of freedom
##   (9 observations deleted due to missingness)
## Multiple R-squared:  0.1151, Adjusted R-squared:  0.1149 
## F-statistic: 615.3 on 1 and 4732 DF,  p-value: < 2.2e-16

High speed internet access

Motivated by this paper https://www.nber.org/papers/w26982.pdf on social distancing, internet access, and inequality, we look at whether a household has “Broadband (high-speed) Internet service such as cable, fiber optic, or DSL service,” and staying at home.

bay_internet_by_block <- pullCensus("group(B28002)", bay_area_counties) %>%
  select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(key = "variable", value = "estimate", -blockgroup) %>% 
  mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>% 
  select(-variable) %>% 
  separate(label, into = c(NA, NA, "subscription", "type", "additional"), sep = "!!") %>% 
  filter(is.na(subscription) | (type == "Broadband such as cable, fiber optic or DSL") & is.na(additional)) %>% 
  mutate(type = replace_na(type, "total_num")) %>% 
  dplyr::select(blockgroup, type, estimate) %>%
  spread(key = type, value = estimate) %>%
  mutate(`percent high speed` = `Broadband such as cable, fiber optic or DSL`*100/total_num, `percent no high speed` = 100-`percent high speed`) %>% 
  left_join(bay_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
  filter(!is.na(device_count)) 

# plotting
bay_internet_by_block %>% 
  ggplot(aes(
  x = `percent high speed`,
  y = `% Not Completely at Home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of households with broadband such as cable, fiber optic or DSL",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "Bay Area: Social Distancing and High Speed Internet"
  )

internet_model <- lm(`% Not Completely at Home` ~ `percent high speed`, bay_internet_by_block)
summary(internet_model)
## 
## Call:
## lm(formula = `% Not Completely at Home` ~ `percent high speed`, 
##     data = bay_internet_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.509  -5.619  -0.439   5.044  44.047 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          69.134985   0.744851   92.82   <2e-16 ***
## `percent high speed` -0.198027   0.009243  -21.43   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.611 on 4728 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.0885, Adjusted R-squared:  0.08831 
## F-statistic: 459.1 on 1 and 4728 DF,  p-value: < 2.2e-16

Compare to pre-shelter-in-place behavior:

bay_internet_by_block %>% 
  ggplot(aes(
  x = `percent high speed`,
  y = `% Not Completely at Home Pre Shelter`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of households without broadband such as cable, fiber optic or DSL",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "Bay Area: Staying at Home and High Speed Internet Pre Shelter-in-Place"
  )

internet_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `percent high speed`, bay_internet_by_block)
summary(internet_model2)
## 
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `percent high speed`, 
##     data = bay_internet_by_block)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -29.3893  -3.1107   0.1585   3.5344  20.8729 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          67.53971    0.44725  151.01   <2e-16 ***
## `percent high speed`  0.12937    0.00555   23.31   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.171 on 4728 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.1031, Adjusted R-squared:  0.1029 
## F-statistic: 543.4 on 1 and 4728 DF,  p-value: < 2.2e-16

Race

bay_race_by_block <- pullCensus("group(B02001)", bay_area_counties) %>% 
  select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(key = "variable", value = "estimate", -blockgroup) %>% 
  mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>% 
  select(-variable) %>% 
  separate(label, into = c(NA, NA, "race", "specification"), sep = "!!") %>% 
  filter(is.na(specification) & !is.na(race)) %>% 
  dplyr::select(blockgroup, estimate, race) %>%
  spread(key = race, value = estimate) %>% 
  mutate(total_race = `American Indian and Alaska Native alone` + `Asian alone` + `Black or African American alone` + `Native Hawaiian and Other Pacific Islander alone` + `Some other race alone` + `Two or more races` + `White alone`, `% white` = `White alone`*100/total_race, `% Asian` = `Asian alone`*100/total_race, `% black` = `Black or African American alone`*100/total_race) %>%  
  left_join(bay_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
  filter(!is.na(device_count)) 

# also get ethnicity data (hispanic/latino vs not)
bay_hisplat_by_block <- pullCensus("group(B03002)", bay_area_counties) %>% 
  select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(key = "variable", value = "estimate", -blockgroup) %>% 
  mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>% 
  select(-variable) %>% 
  separate(label, into = c(NA, NA, "hisp/lat", "specification"), sep = "!!") %>%
  filter(is.na(specification) & !is.na(`hisp/lat`)) %>% 
  dplyr::select(blockgroup, estimate, `hisp/lat`) %>% 
  spread(key = `hisp/lat`, value = estimate) %>%
  mutate(`% non hispanic/latino` = `Not Hispanic or Latino`*100/(`Hispanic or Latino` + `Not Hispanic or Latino`))

# join with the race data
bay_race_by_block <- bay_race_by_block %>% left_join(bay_hisplat_by_block %>% dplyr::select(blockgroup, `% non hispanic/latino`))

# plotting
# percent white
bay_race_by_block %>% 
  ggplot(aes(
  x = `% white`,
  y = `% Not Completely at Home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of residents that are white",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "Bay Area: Social Distancing and White Residents"
  )

white_model <- lm(`% Not Completely at Home` ~ `% white`, bay_race_by_block)
summary(white_model)
## 
## Call:
## lm(formula = `% Not Completely at Home` ~ `% white`, data = bay_race_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.209  -5.867  -0.325   5.406  40.720 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 49.017281   0.317038  154.61   <2e-16 ***
## `% white`    0.082034   0.005378   15.25   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.853 on 4734 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.04684,    Adjusted R-squared:  0.04664 
## F-statistic: 232.6 on 1 and 4734 DF,  p-value: < 2.2e-16
# percent Asian
bay_race_by_block %>% 
  ggplot(aes(
  x = `% Asian`,
  y = `% Not Completely at Home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of residents that are Asian",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "Bay Area: Social Distancing and Asian Residents"
  )

asian_model <- lm(`% Not Completely at Home` ~ `% Asian`, bay_race_by_block)
summary(asian_model)
## 
## Call:
## lm(formula = `% Not Completely at Home` ~ `% Asian`, data = bay_race_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.858  -5.282  -0.568   4.575  43.156 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 58.014237   0.178989  324.12   <2e-16 ***
## `% Asian`   -0.193485   0.005685  -34.03   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.128 on 4734 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.1966, Adjusted R-squared:  0.1964 
## F-statistic:  1158 on 1 and 4734 DF,  p-value: < 2.2e-16
# percent non hispanic/latino
bay_race_by_block %>% 
  ggplot(aes(
  x = `% non hispanic/latino`,
  y = `% Not Completely at Home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of residents that are not Hispanic or Latino",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "Bay Area: Social Distancing and Hispanic/Latino Residents"
  )

hisp_model <- lm(`% Not Completely at Home` ~ `% non hispanic/latino`, bay_race_by_block)
summary(hisp_model)
## 
## Call:
## lm(formula = `% Not Completely at Home` ~ `% non hispanic/latino`, 
##     data = bay_race_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.693  -5.689  -0.546   4.941  41.777 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             62.566651   0.522442  119.76   <2e-16 ***
## `% non hispanic/latino` -0.117579   0.006525  -18.02   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.772 on 4734 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.06419,    Adjusted R-squared:  0.06399 
## F-statistic: 324.7 on 1 and 4734 DF,  p-value: < 2.2e-16

Compare to pre-shelter-in-place behavior:

bay_race_by_block %>% 
  ggplot(aes(
  x = `% white`,
  y = `% Not Completely at Home Pre Shelter`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of residents that are white",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "Bay Area: Social Distancing and White Residents Pre Shelter-in-Place"
  )

white_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `% white`, bay_race_by_block)
summary(white_model2)
## 
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `% white`, 
##     data = bay_race_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -31.636  -3.187   0.365   3.610  14.301 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 74.825617   0.190483  392.82   <2e-16 ***
## `% white`    0.055451   0.003231   17.16   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.317 on 4732 degrees of freedom
##   (9 observations deleted due to missingness)
## Multiple R-squared:  0.05859,    Adjusted R-squared:  0.05839 
## F-statistic: 294.5 on 1 and 4732 DF,  p-value: < 2.2e-16
bay_race_by_block %>% 
  ggplot(aes(
  x = `% Asian`,
  y = `% Not Completely at Home Pre Shelter`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of residents that are Asian",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "Bay Area: Social Distancing and Asian Residents Pre Shelter-in-Place"
  )

asian_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `% Asian`, bay_race_by_block)
summary(asian_model2)
## 
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `% Asian`, 
##     data = bay_race_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -32.937  -3.425   0.249   3.755  14.494 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 77.600043   0.120661 643.127   <2e-16 ***
## `% Asian`    0.009013   0.003832   2.352   0.0187 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.477 on 4732 degrees of freedom
##   (9 observations deleted due to missingness)
## Multiple R-squared:  0.001168,   Adjusted R-squared:  0.0009568 
## F-statistic: 5.533 on 1 and 4732 DF,  p-value: 0.0187
bay_race_by_block %>% 
  ggplot(aes(
  x = `% non hispanic/latino`,
  y = `% Not Completely at Home Pre Shelter`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of residents that are not Hispanic or Latino",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "Bay Area: Social Distancing and Hispanic/Latino Residents Pre Shelter-in-Place"
  )

hisp_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `% non hispanic/latino`, bay_race_by_block)
summary(hisp_model2)
## 
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `% non hispanic/latino`, 
##     data = bay_race_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -33.022  -3.189   0.459   3.605  17.674 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             72.007971   0.314583  228.90   <2e-16 ***
## `% non hispanic/latino`  0.074764   0.003929   19.03   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.282 on 4732 degrees of freedom
##   (9 observations deleted due to missingness)
## Multiple R-squared:  0.07109,    Adjusted R-squared:  0.07089 
## F-statistic: 362.1 on 1 and 4732 DF,  p-value: < 2.2e-16

Multiple regression analysis

Multiple regression analysis with income, education, and internet

# multiple regression 
modeltest <- lm(bay_ami_by_block$`% Not Completely at Home` ~ bay_ami_by_block$`% over 125,000` + bay_education_by_block$`percent associates or higher` + bay_internet_by_block$`percent high speed`)
summary(modeltest)
## 
## Call:
## lm(formula = bay_ami_by_block$`% Not Completely at Home` ~ bay_ami_by_block$`% over 125,000` + 
##     bay_education_by_block$`percent associates or higher` + bay_internet_by_block$`percent high speed`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -24.971  -5.549  -0.589   4.635  45.232 
## 
## Coefficients:
##                                                        Estimate Std. Error
## (Intercept)                                           64.502846   0.764507
## bay_ami_by_block$`% over 125,000`                     -0.122709   0.009049
## bay_education_by_block$`percent associates or higher` -0.023364   0.008954
## bay_internet_by_block$`percent high speed`            -0.064243   0.011652
##                                                       t value Pr(>|t|)    
## (Intercept)                                            84.372  < 2e-16 ***
## bay_ami_by_block$`% over 125,000`                     -13.561  < 2e-16 ***
## bay_education_by_block$`percent associates or higher`  -2.609   0.0091 ** 
## bay_internet_by_block$`percent high speed`             -5.514  3.7e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.319 on 4726 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.1497, Adjusted R-squared:  0.1492 
## F-statistic: 277.3 on 3 and 4726 DF,  p-value: < 2.2e-16

Changes in staying at home behavior

bay_dem_distancing <- bay_internet_by_block %>% 
  dplyr::select(`percent high speed`, `% Not Completely at Home`, `% Completely at Home`, blockgroup) %>% 
  left_join(bay_education_by_block %>% dplyr::select(blockgroup, `percent associates or higher`)) %>% 
  left_join(bay_ami_by_block %>% dplyr::select(blockgroup, `% over 125,000`)) %>% 
  left_join(bay_ami_by_block %>% dplyr::select(blockgroup, `% over 100,000`)) %>% 
  left_join(bay_ami_by_block %>% dplyr::select(blockgroup, `% over 75,000`)) %>% 
  left_join(bay_age_by_block %>% dplyr::select(blockgroup, `percent less than 30`)) %>% 
  left_join(bay_age_by_block %>% dplyr::select(blockgroup, `percent elderly`)) %>% 
  left_join(bay_lang_by_block %>% dplyr::select(blockgroup, `% not speaking spanish`)) %>% 
  left_join(bay_lang_by_block %>% dplyr::select(blockgroup, `% speaking english > well`)) %>% 
  left_join(bay_no_vehicles_by_block %>% dplyr::select(blockgroup, `percent with vehicles`)) %>%
  left_join(bay_occupants_per_room_by_block %>% dplyr::select(blockgroup, `percent less than 1`)) %>% 
  left_join(bay_race_by_block %>% dplyr::select(blockgroup, `% white`, `% Asian`, `% non hispanic/latino`))

bay_dem_distancing_pre_post <- bay_dem_distancing %>% 
  left_join(bay_internet_by_block %>% dplyr::select(`% Not Completely at Home Pre Shelter`, `% Completely at Home Pre Shelter`, blockgroup)) %>% 
  mutate(`% increase in staying completely home` = `% Completely at Home` - `% Completely at Home Pre Shelter`, frac_increase = `% increase in staying completely home`/`% Completely at Home Pre Shelter`)

bay_dem_distancing[is.na(bay_dem_distancing)] <- 0
bay_dem_distancing_pre_post[is.na(bay_dem_distancing_pre_post)] <- 0

saveRDS(bay_dem_distancing_pre_post, "/Users/simonespeizer/Documents/2020 Spring Quarter/CEE 218Z/covid19/Simone_Speizer/bay_socialdistancing_demdata_prepostdifs_manyvars.rds")

# bay_dem_distancing_pre_post <- readRDS("/Users/simonespeizer/Documents/2020 Spring Quarter/CEE 218Z/covid19/Simone_Speizer/bay_socialdistancing_demdata_prepostdifs_manyvars.rds")

Age

# age
bay_dem_distancing_pre_post %>%
  ggplot(aes(
  x = `percent less than 30`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
labs(
    x = "Percent of residents younger than 30",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "Bay Area: Social Distancing and Young Age Groups"
  )

young_model_dif <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`percent less than 30`)
summary(young_model_dif)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     bay_dem_distancing_pre_post$`percent less than 30`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -60.100  -6.430  -0.026   6.949  33.015 
## 
## Coefficients:
##                                                    Estimate Std. Error t value
## (Intercept)                                        29.02612    0.54494  53.265
## bay_dem_distancing_pre_post$`percent less than 30` -0.13241    0.01475  -8.975
##                                                    Pr(>|t|)    
## (Intercept)                                          <2e-16 ***
## bay_dem_distancing_pre_post$`percent less than 30`   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.71 on 4741 degrees of freedom
## Multiple R-squared:  0.01671,    Adjusted R-squared:  0.0165 
## F-statistic: 80.55 on 1 and 4741 DF,  p-value: < 2.2e-16
young_model_frac <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`percent less than 30`)
summary(young_model_frac)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`percent less than 30`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.2784 -0.4980 -0.1216  0.3772  4.0286 
## 
## Coefficients:
##                                                     Estimate Std. Error t value
## (Intercept)                                         1.595957   0.037886  42.125
## bay_dem_distancing_pre_post$`percent less than 30` -0.010209   0.001026  -9.953
##                                                    Pr(>|t|)    
## (Intercept)                                          <2e-16 ***
## bay_dem_distancing_pre_post$`percent less than 30`   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7449 on 4741 degrees of freedom
## Multiple R-squared:  0.02047,    Adjusted R-squared:  0.02026 
## F-statistic: 99.07 on 1 and 4741 DF,  p-value: < 2.2e-16
bay_dem_distancing_pre_post %>% filter(`percent elderly` < 50) %>% # get rid of extreme outliers
  ggplot(aes(
  x = `percent elderly`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of residents 65 and older",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "Bay Area: Social Distancing and Elderly Population"
  )

elderly_model_dif <- lm(`% increase in staying completely home` ~ `percent elderly`, bay_dem_distancing_pre_post %>% filter(`percent elderly` < 50))
summary(elderly_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `percent elderly`, 
##     data = bay_dem_distancing_pre_post %>% filter(`percent elderly` < 
##         50))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -62.210  -6.562  -0.043   7.023  33.265 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       22.14817    0.33111  66.892  < 2e-16 ***
## `percent elderly`  0.14894    0.01913   7.784 8.55e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.71 on 4700 degrees of freedom
## Multiple R-squared:  0.01273,    Adjusted R-squared:  0.01252 
## F-statistic:  60.6 on 1 and 4700 DF,  p-value: 8.551e-15
elderly_model_frac <- lm(frac_increase ~ `percent elderly`, bay_dem_distancing_pre_post %>% filter(`percent elderly` < 50))
summary(elderly_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `percent elderly`, data = bay_dem_distancing_pre_post %>% 
##     filter(`percent elderly` < 50))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.0865 -0.5038 -0.1219  0.3770  4.0081 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       1.013722   0.022954   44.16   <2e-16 ***
## `percent elderly` 0.014829   0.001326   11.18   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7427 on 4700 degrees of freedom
## Multiple R-squared:  0.02591,    Adjusted R-squared:  0.0257 
## F-statistic:   125 on 1 and 4700 DF,  p-value: < 2.2e-16

Income

# income - less than $75000
bay_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `% over 75,000`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) +
  labs(
    x = "Percent of housholds with incomes over $75,000 (50% AMI) annually",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "Bay Area: Social Distancing and Households Above 50% AMI"
  )

income_75_model_dif <- lm(`% increase in staying completely home` ~ `% over 75,000`, bay_dem_distancing_pre_post)
summary(income_75_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `% over 75,000`, 
##     data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -64.250  -5.221   0.455   5.961  32.420 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     7.953550   0.444147   17.91   <2e-16 ***
## `% over 75,000` 0.271059   0.006991   38.77   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.414 on 4741 degrees of freedom
## Multiple R-squared:  0.2408, Adjusted R-squared:  0.2406 
## F-statistic:  1503 on 1 and 4741 DF,  p-value: < 2.2e-16
income_75_model_frac <- lm(frac_increase ~ `% over 75,000`, bay_dem_distancing_pre_post)
summary(income_75_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `% over 75,000`, data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.2526 -0.3937 -0.0506  0.3249  3.8100 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.0393374  0.0304605   1.291    0.197    
## `% over 75,000` 0.0197722  0.0004794  41.240   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6457 on 4741 degrees of freedom
## Multiple R-squared:  0.264,  Adjusted R-squared:  0.2639 
## F-statistic:  1701 on 1 and 4741 DF,  p-value: < 2.2e-16
# income - less than $100000
bay_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `% over 100,000`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) +
  labs(
    x = "Percent of housholds with incomes over $100,000 (80% AMI) annually",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "Bay Area: Social Distancing and Households Below 80% AMI"
  )

income_100_model_dif <- lm(`% increase in staying completely home` ~ `% over 100,000`, bay_dem_distancing_pre_post)
summary(income_100_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `% over 100,000`, 
##     data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -64.646  -4.939   0.565   5.858  29.290 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      11.130681   0.342642   32.48   <2e-16 ***
## `% over 100,000`  0.269835   0.006442   41.89   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.231 on 4741 degrees of freedom
## Multiple R-squared:  0.2701, Adjusted R-squared:  0.2699 
## F-statistic:  1754 on 1 and 4741 DF,  p-value: < 2.2e-16
income_100_model_frac <- lm(frac_increase ~ `% over 100,000`, bay_dem_distancing_pre_post)
summary(income_100_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `% over 100,000`, data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.3549 -0.3751 -0.0358  0.3250  3.7199 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.2605015  0.0233278   11.17   <2e-16 ***
## `% over 100,000` 0.0198993  0.0004386   45.37   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6285 on 4741 degrees of freedom
## Multiple R-squared:  0.3027, Adjusted R-squared:  0.3026 
## F-statistic:  2058 on 1 and 4741 DF,  p-value: < 2.2e-16
# income - less than $125000
bay_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `% over 125,000`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) +
  labs(
    x = "Percent of housholds with incomes over $125,000 annually",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "Bay Area: Social Distancing and Households Below $125,000"
  )

income_125_model_dif <- lm(`% increase in staying completely home` ~ `% over 125,000`, bay_dem_distancing_pre_post)
summary(income_125_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `% over 125,000`, 
##     data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -67.492  -4.758   0.654   5.927  27.999 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      13.351784   0.284488   46.93   <2e-16 ***
## `% over 125,000`  0.280700   0.006432   43.64   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.126 on 4741 degrees of freedom
## Multiple R-squared:  0.2866, Adjusted R-squared:  0.2865 
## F-statistic:  1905 on 1 and 4741 DF,  p-value: < 2.2e-16
income_125_model_frac <- lm(frac_increase ~ `% over 125,000`, bay_dem_distancing_pre_post)
summary(income_125_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `% over 125,000`, data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5762 -0.3554 -0.0266  0.3203  3.6897 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.4145126  0.0192182   21.57   <2e-16 ***
## `% over 125,000` 0.0209505  0.0004345   48.22   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6165 on 4741 degrees of freedom
## Multiple R-squared:  0.3291, Adjusted R-squared:  0.3289 
## F-statistic:  2325 on 1 and 4741 DF,  p-value: < 2.2e-16

Language

# language
bay_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `% speaking english > well`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of individuals speaking English well",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "Bay Area: Social Distancing and English Language Ability"
  )

english_ability_model_dif <- lm(`% increase in staying completely home` ~ `% speaking english > well`, bay_dem_distancing_pre_post)
summary(english_ability_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `% speaking english > well`, 
##     data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -59.049  -6.490   0.120   6.887  32.881 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  6.05823    1.55805   3.888 0.000102 ***
## `% speaking english > well`  0.19831    0.01682  11.791  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.65 on 4741 degrees of freedom
## Multiple R-squared:  0.02849,    Adjusted R-squared:  0.02829 
## F-statistic:   139 on 1 and 4741 DF,  p-value: < 2.2e-16
english_ability_model_frac <- lm(frac_increase ~ `% speaking english > well`, bay_dem_distancing_pre_post)
summary(english_ability_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `% speaking english > well`, data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1691 -0.4819 -0.0973  0.3788  3.9882 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 -0.570920   0.106910   -5.34 9.72e-08 ***
## `% speaking english > well`  0.019586   0.001154   16.97  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7307 on 4741 degrees of freedom
## Multiple R-squared:  0.05728,    Adjusted R-squared:  0.05708 
## F-statistic:   288 on 1 and 4741 DF,  p-value: < 2.2e-16
bay_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `% not speaking spanish`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of individuals not speaking Spanish",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "Bay Area: Social Distancing and Spanish Language Ability"
  )

spanish_speaking_model_dif <- lm(`% increase in staying completely home` ~ `% not speaking spanish`, bay_dem_distancing_pre_post)
summary(spanish_speaking_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `% not speaking spanish`, 
##     data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -64.610  -5.838   0.766   6.640  29.535 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              7.043840   0.711325   9.902   <2e-16 ***
## `% not speaking spanish` 0.206215   0.008297  24.855   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.16 on 4741 degrees of freedom
## Multiple R-squared:  0.1153, Adjusted R-squared:  0.1151 
## F-statistic: 617.8 on 1 and 4741 DF,  p-value: < 2.2e-16
spanish_speaking_model_frac <- lm(frac_increase ~ `% not speaking spanish`, bay_dem_distancing_pre_post)
summary(spanish_speaking_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `% not speaking spanish`, data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1477 -0.4496 -0.0563  0.3612  3.8885 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              -0.0577044  0.0490605  -1.176     0.24    
## `% not speaking spanish`  0.0154081  0.0005722  26.926   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7009 on 4741 degrees of freedom
## Multiple R-squared:  0.1326, Adjusted R-squared:  0.1325 
## F-statistic:   725 on 1 and 4741 DF,  p-value: < 2.2e-16

Occupants per room

# occupants per room
bay_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `percent less than 1`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of households with 1 or fewer occupant per room",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "Bay Area: Social Distancing and Room Occupancy"
  )

occupants_model_dif <- lm(`% increase in staying completely home` ~ `percent less than 1`, bay_dem_distancing_pre_post)
summary(occupants_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `percent less than 1`, 
##     data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -64.126  -6.426   0.247   6.874  32.852 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             0.4739     1.4499   0.327    0.744    
## `percent less than 1`   0.2565     0.0155  16.551   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.51 on 4741 degrees of freedom
## Multiple R-squared:  0.05463,    Adjusted R-squared:  0.05443 
## F-statistic:   274 on 1 and 4741 DF,  p-value: < 2.2e-16
occupants_model_frac <- lm(frac_increase ~ `percent less than 1`, bay_dem_distancing_pre_post)
summary(occupants_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `percent less than 1`, data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1174 -0.4880 -0.0965  0.3791  3.9044 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           -0.701849   0.099951  -7.022  2.5e-12 ***
## `percent less than 1`  0.020814   0.001068  19.482  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7242 on 4741 degrees of freedom
## Multiple R-squared:  0.07412,    Adjusted R-squared:  0.07393 
## F-statistic: 379.5 on 1 and 4741 DF,  p-value: < 2.2e-16

Vehicle ownership

# vehicles - percent with no vehicles
bay_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `percent with vehicles`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of housholds with vehicles available",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "Bay Area: Social Distancing and Vehicle Availability"
  )

vehicles_model_dif <- lm(`% increase in staying completely home` ~ `percent with vehicles`, bay_dem_distancing_pre_post)
summary(vehicles_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `percent with vehicles`, 
##     data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -63.898  -6.598   0.045   7.086  31.302 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              7.93480    1.05527   7.519 6.55e-14 ***
## `percent with vehicles`  0.17964    0.01143  15.711  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.53 on 4741 degrees of freedom
## Multiple R-squared:  0.04949,    Adjusted R-squared:  0.04929 
## F-statistic: 246.8 on 1 and 4741 DF,  p-value: < 2.2e-16
vehicles_model_frac <- lm(frac_increase ~ `percent with vehicles`, bay_dem_distancing_pre_post)
summary(vehicles_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `percent with vehicles`, data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.0978 -0.4998 -0.1115  0.3742  3.9950 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             -0.0837869  0.0728712   -1.15     0.25    
## `percent with vehicles`  0.0144368  0.0007896   18.29   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7274 on 4741 degrees of freedom
## Multiple R-squared:  0.06587,    Adjusted R-squared:  0.06568 
## F-statistic: 334.3 on 1 and 4741 DF,  p-value: < 2.2e-16

Education

bay_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `percent associates or higher`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of people with an degree at Associate's level or higher",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "Bay Area: Social Distancing and Education"
  )

educ_model_dif <- lm(`% increase in staying completely home` ~ `percent associates or higher`, bay_dem_distancing_pre_post)
summary(educ_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `percent associates or higher`, 
##     data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -67.324  -5.095   0.804   6.297  26.237 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    12.456294   0.379359   32.84   <2e-16 ***
## `percent associates or higher`  0.235877   0.006992   33.74   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.703 on 4741 degrees of freedom
## Multiple R-squared:  0.1936, Adjusted R-squared:  0.1934 
## F-statistic:  1138 on 1 and 4741 DF,  p-value: < 2.2e-16
educ_model_frac <- lm(frac_increase ~ `percent associates or higher`, bay_dem_distancing_pre_post)
summary(educ_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `percent associates or higher`, 
##     data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.3485 -0.3817 -0.0352  0.3455  3.5561 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    0.3380681  0.0258703   13.07   <2e-16 ***
## `percent associates or higher` 0.0177958  0.0004768   37.32   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6617 on 4741 degrees of freedom
## Multiple R-squared:  0.2271, Adjusted R-squared:  0.2269 
## F-statistic:  1393 on 1 and 4741 DF,  p-value: < 2.2e-16

Internet

bay_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `percent high speed`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of households with broadband such as cable, fiber optic or DSL",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "Bay Area: Social Distancing and High Speed Internet"
  )

internet_model_dif <- lm(`% increase in staying completely home` ~ `percent high speed`, bay_dem_distancing_pre_post)
summary(internet_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `percent high speed`, 
##     data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -62.594  -5.668   0.360   6.336  36.721 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           -1.7157     0.8052  -2.131   0.0331 *  
## `percent high speed`   0.3289     0.0100  32.872   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.75 on 4741 degrees of freedom
## Multiple R-squared:  0.1856, Adjusted R-squared:  0.1854 
## F-statistic:  1081 on 1 and 4741 DF,  p-value: < 2.2e-16
internet_model_frac <- lm(frac_increase ~ `percent high speed`, bay_dem_distancing_pre_post)
summary(internet_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `percent high speed`, data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.3879 -0.4325 -0.0922  0.3387  3.7733 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -0.5262907  0.0564584  -9.322   <2e-16 ***
## `percent high speed`  0.0222260  0.0007015  31.682   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6837 on 4741 degrees of freedom
## Multiple R-squared:  0.1747, Adjusted R-squared:  0.1746 
## F-statistic:  1004 on 1 and 4741 DF,  p-value: < 2.2e-16

Race

# white
bay_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `% white`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of residents that are white",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "Bay Area: Social Distancing and White Residents"
  )

white_model_dif <- lm(`% increase in staying completely home` ~ `% white`, bay_dem_distancing_pre_post)
summary(white_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `% white`, 
##     data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -61.389  -6.847  -0.048   7.117  32.003 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 25.556773   0.384698  66.433  < 2e-16 ***
## `% white`   -0.022638   0.006531  -3.466 0.000533 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.79 on 4741 degrees of freedom
## Multiple R-squared:  0.002528,   Adjusted R-squared:  0.002317 
## F-statistic: 12.01 on 1 and 4741 DF,  p-value: 0.0005325
white_model_frac <- lm(frac_increase ~ `% white`, bay_dem_distancing_pre_post)
summary(white_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `% white`, data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1100 -0.5091 -0.1160  0.3948  4.1101 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.0602360  0.0266874  39.728  < 2e-16 ***
## `% white`   0.0032404  0.0004531   7.152 9.85e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7486 on 4741 degrees of freedom
## Multiple R-squared:  0.01067,    Adjusted R-squared:  0.01047 
## F-statistic: 51.15 on 1 and 4741 DF,  p-value: 9.85e-13
# asian
bay_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `% Asian`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of residents that are Asian",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "Bay Area: Social Distancing and Asian Residents"
  )

asian_model_dif <- lm(`% increase in staying completely home` ~ `% Asian`, bay_dem_distancing_pre_post)
summary(asian_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `% Asian`, 
##     data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -57.551  -5.998   0.063   6.561  31.500 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 19.516119   0.218434   89.34   <2e-16 ***
## `% Asian`    0.204167   0.006943   29.41   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.936 on 4741 degrees of freedom
## Multiple R-squared:  0.1543, Adjusted R-squared:  0.1541 
## F-statistic: 864.8 on 1 and 4741 DF,  p-value: < 2.2e-16
asian_model_frac <- lm(frac_increase ~ `% Asian`, bay_dem_distancing_pre_post)
summary(asian_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `% Asian`, data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.3227 -0.4851 -0.1477  0.3732  4.1343 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.0174405  0.0160032   63.58   <2e-16 ***
## `% Asian`   0.0091914  0.0005087   18.07   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.728 on 4741 degrees of freedom
## Multiple R-squared:  0.06443,    Adjusted R-squared:  0.06424 
## F-statistic: 326.5 on 1 and 4741 DF,  p-value: < 2.2e-16
# hispanic/latino
bay_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `% non hispanic/latino`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of residents that are not Hispanic or Latino",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "Bay Area: Social Distancing and Hispanic/Latino Residents"
  )

hisp_model_dif <- lm(`% increase in staying completely home` ~ `% non hispanic/latino`, bay_dem_distancing_pre_post)
summary(hisp_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `% non hispanic/latino`, 
##     data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -65.159  -5.758   0.740   6.654  28.741 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             9.146918   0.593446   15.41   <2e-16 ***
## `% non hispanic/latino` 0.195944   0.007417   26.42   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.09 on 4741 degrees of freedom
## Multiple R-squared:  0.1283, Adjusted R-squared:  0.1281 
## F-statistic: 697.9 on 1 and 4741 DF,  p-value: < 2.2e-16
hisp_model_frac <- lm(frac_increase ~ `% non hispanic/latino`, bay_dem_distancing_pre_post)
summary(hisp_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `% non hispanic/latino`, data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1843 -0.4399 -0.0552  0.3701  3.8168 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             0.0930276  0.0408371   2.278   0.0228 *  
## `% non hispanic/latino` 0.0147233  0.0005104  28.846   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6942 on 4741 degrees of freedom
## Multiple R-squared:  0.1493, Adjusted R-squared:  0.1491 
## F-statistic: 832.1 on 1 and 4741 DF,  p-value: < 2.2e-16

Multiple regression analysis: income and Spanish language ability

difs_model_inc_span <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% not speaking spanish`)
summary(difs_model_inc_span)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% not speaking spanish`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -67.599  -4.764   0.713   5.893  26.968 
## 
## Coefficients:
##                                                       Estimate Std. Error
## (Intercept)                                          10.504466   0.645182
## bay_dem_distancing_pre_post$`% over 125,000`          0.260491   0.007621
## bay_dem_distancing_pre_post$`% not speaking spanish`  0.043381   0.008828
##                                                      t value Pr(>|t|)    
## (Intercept)                                           16.281  < 2e-16 ***
## bay_dem_distancing_pre_post$`% over 125,000`          34.182  < 2e-16 ***
## bay_dem_distancing_pre_post$`% not speaking spanish`   4.914 9.22e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.104 on 4740 degrees of freedom
## Multiple R-squared:  0.2902, Adjusted R-squared:  0.2899 
## F-statistic: 969.1 on 2 and 4740 DF,  p-value: < 2.2e-16
frac_model_inc_span <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% not speaking spanish`)
summary(frac_model_inc_span)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + 
##     bay_dem_distancing_pre_post$`% not speaking spanish`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5031 -0.3574 -0.0205  0.3212  3.6630 
## 
## Coefficients:
##                                                       Estimate Std. Error
## (Intercept)                                          0.2004385  0.0435578
## bay_dem_distancing_pre_post$`% over 125,000`         0.0194311  0.0005145
## bay_dem_distancing_pre_post$`% not speaking spanish` 0.0032616  0.0005960
##                                                      t value Pr(>|t|)    
## (Intercept)                                            4.602 4.30e-06 ***
## bay_dem_distancing_pre_post$`% over 125,000`          37.767  < 2e-16 ***
## bay_dem_distancing_pre_post$`% not speaking spanish`   5.473 4.66e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6146 on 4740 degrees of freedom
## Multiple R-squared:  0.3333, Adjusted R-squared:  0.333 
## F-statistic:  1185 on 2 and 4740 DF,  p-value: < 2.2e-16

Multiple regression analysis: income, education, and Spanish language ability

difs_model_inc_span_educ <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% not speaking spanish` +  bay_dem_distancing_pre_post$`percent associates or higher`)
summary(difs_model_inc_span_educ)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% not speaking spanish` + 
##         bay_dem_distancing_pre_post$`percent associates or higher`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -67.941  -4.669   0.849   5.911  25.738 
## 
## Coefficients:
##                                                             Estimate Std. Error
## (Intercept)                                                10.974364   0.652670
## bay_dem_distancing_pre_post$`% over 125,000`                0.237072   0.009274
## bay_dem_distancing_pre_post$`% not speaking spanish`        0.019177   0.010378
## bay_dem_distancing_pre_post$`percent associates or higher`  0.049164   0.011139
##                                                            t value Pr(>|t|)    
## (Intercept)                                                 16.815  < 2e-16 ***
## bay_dem_distancing_pre_post$`% over 125,000`                25.564  < 2e-16 ***
## bay_dem_distancing_pre_post$`% not speaking spanish`         1.848   0.0647 .  
## bay_dem_distancing_pre_post$`percent associates or higher`   4.414 1.04e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.086 on 4739 degrees of freedom
## Multiple R-squared:  0.2931, Adjusted R-squared:  0.2927 
## F-statistic: 655.1 on 3 and 4739 DF,  p-value: < 2.2e-16
frac_model_inc_span_educ <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% not speaking spanish` +  bay_dem_distancing_pre_post$`percent associates or higher`)
summary(frac_model_inc_span_educ)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + 
##     bay_dem_distancing_pre_post$`% not speaking spanish` + bay_dem_distancing_pre_post$`percent associates or higher`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5284 -0.3470 -0.0176  0.3248  3.6048 
## 
## Coefficients:
##                                                             Estimate Std. Error
## (Intercept)                                                0.2406074  0.0440086
## bay_dem_distancing_pre_post$`% over 125,000`               0.0174292  0.0006253
## bay_dem_distancing_pre_post$`% not speaking spanish`       0.0011925  0.0006998
## bay_dem_distancing_pre_post$`percent associates or higher` 0.0042028  0.0007511
##                                                            t value Pr(>|t|)    
## (Intercept)                                                  5.467 4.80e-08 ***
## bay_dem_distancing_pre_post$`% over 125,000`                27.873  < 2e-16 ***
## bay_dem_distancing_pre_post$`% not speaking spanish`         1.704   0.0884 .  
## bay_dem_distancing_pre_post$`percent associates or higher`   5.596 2.32e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6126 on 4739 degrees of freedom
## Multiple R-squared:  0.3376, Adjusted R-squared:  0.3372 
## F-statistic: 805.3 on 3 and 4739 DF,  p-value: < 2.2e-16

The effect of Spanish language speaking vanishes when accounting for both education and income.

Multiple regression analysis: income, English language ability and education

difs_model_inc_eng_educ <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% speaking english > well` +  bay_dem_distancing_pre_post$`percent associates or higher`)
summary(difs_model_inc_eng_educ)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% speaking english > well` + 
##         bay_dem_distancing_pre_post$`percent associates or higher`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -71.556  -4.375   0.911   5.758  25.075 
## 
## Coefficients:
##                                                             Estimate Std. Error
## (Intercept)                                                24.885888   1.402015
## bay_dem_distancing_pre_post$`% over 125,000`                0.246048   0.009201
## bay_dem_distancing_pre_post$`% speaking english > well`    -0.162318   0.017074
## bay_dem_distancing_pre_post$`percent associates or higher`  0.094982   0.010066
##                                                            t value Pr(>|t|)    
## (Intercept)                                                 17.750   <2e-16 ***
## bay_dem_distancing_pre_post$`% over 125,000`                26.742   <2e-16 ***
## bay_dem_distancing_pre_post$`% speaking english > well`     -9.507   <2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher`   9.436   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.004 on 4739 degrees of freedom
## Multiple R-squared:  0.3059, Adjusted R-squared:  0.3054 
## F-statistic: 696.1 on 3 and 4739 DF,  p-value: < 2.2e-16
frac_model_inc_eng_educ <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% speaking english > well` +  bay_dem_distancing_pre_post$`percent associates or higher`)
summary(frac_model_inc_eng_educ)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + 
##     bay_dem_distancing_pre_post$`% speaking english > well` + 
##     bay_dem_distancing_pre_post$`percent associates or higher`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5749 -0.3448 -0.0136  0.3186  3.6055 
## 
## Coefficients:
##                                                              Estimate
## (Intercept)                                                 0.7406258
## bay_dem_distancing_pre_post$`% over 125,000`                0.0177690
## bay_dem_distancing_pre_post$`% speaking english > well`    -0.0055003
## bay_dem_distancing_pre_post$`percent associates or higher`  0.0060631
##                                                            Std. Error t value
## (Intercept)                                                 0.0952022   7.780
## bay_dem_distancing_pre_post$`% over 125,000`                0.0006248  28.440
## bay_dem_distancing_pre_post$`% speaking english > well`     0.0011594  -4.744
## bay_dem_distancing_pre_post$`percent associates or higher`  0.0006835   8.870
##                                                            Pr(>|t|)    
## (Intercept)                                                8.87e-15 ***
## bay_dem_distancing_pre_post$`% over 125,000`                < 2e-16 ***
## bay_dem_distancing_pre_post$`% speaking english > well`    2.16e-06 ***
## bay_dem_distancing_pre_post$`percent associates or higher`  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6114 on 4739 degrees of freedom
## Multiple R-squared:  0.3404, Adjusted R-squared:   0.34 
## F-statistic: 815.1 on 3 and 4739 DF,  p-value: < 2.2e-16

Multiple regression analysis: income, English language ability, education, Spanish language ability, and vehicle ownership

difs_model_lots <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% speaking english > well` +  bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`% not speaking spanish` + bay_dem_distancing_pre_post$`percent with vehicles`)
summary(difs_model_lots)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% speaking english > well` + 
##         bay_dem_distancing_pre_post$`percent associates or higher` + 
##         bay_dem_distancing_pre_post$`% not speaking spanish` + 
##         bay_dem_distancing_pre_post$`percent with vehicles`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -74.596  -4.373   0.784   5.597  25.163 
## 
## Coefficients:
##                                                             Estimate Std. Error
## (Intercept)                                                18.551493   1.472279
## bay_dem_distancing_pre_post$`% over 125,000`                0.206936   0.009617
## bay_dem_distancing_pre_post$`% speaking english > well`    -0.270913   0.019019
## bay_dem_distancing_pre_post$`percent associates or higher`  0.096720   0.011274
## bay_dem_distancing_pre_post$`% not speaking spanish`        0.075109   0.010810
## bay_dem_distancing_pre_post$`percent with vehicles`         0.125811   0.010994
##                                                            t value Pr(>|t|)    
## (Intercept)                                                 12.601  < 2e-16 ***
## bay_dem_distancing_pre_post$`% over 125,000`                21.517  < 2e-16 ***
## bay_dem_distancing_pre_post$`% speaking english > well`    -14.244  < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher`   8.579  < 2e-16 ***
## bay_dem_distancing_pre_post$`% not speaking spanish`         6.948  4.2e-12 ***
## bay_dem_distancing_pre_post$`percent with vehicles`         11.444  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.858 on 4737 degrees of freedom
## Multiple R-squared:  0.3284, Adjusted R-squared:  0.3277 
## F-statistic: 463.3 on 5 and 4737 DF,  p-value: < 2.2e-16
frac_model_lots <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% speaking english > well` +  bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`% not speaking spanish` + bay_dem_distancing_pre_post$`percent with vehicles`)
summary(frac_model_lots)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + 
##     bay_dem_distancing_pre_post$`% speaking english > well` + 
##     bay_dem_distancing_pre_post$`percent associates or higher` + 
##     bay_dem_distancing_pre_post$`% not speaking spanish` + bay_dem_distancing_pre_post$`percent with vehicles`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.7213 -0.3459 -0.0212  0.3139  3.5418 
## 
## Coefficients:
##                                                              Estimate
## (Intercept)                                                 0.3019753
## bay_dem_distancing_pre_post$`% over 125,000`                0.0150253
## bay_dem_distancing_pre_post$`% speaking english > well`    -0.0124690
## bay_dem_distancing_pre_post$`percent associates or higher`  0.0068811
## bay_dem_distancing_pre_post$`% not speaking spanish`        0.0038535
## bay_dem_distancing_pre_post$`percent with vehicles`         0.0090237
##                                                            Std. Error t value
## (Intercept)                                                 0.0999814   3.020
## bay_dem_distancing_pre_post$`% over 125,000`                0.0006531  23.006
## bay_dem_distancing_pre_post$`% speaking english > well`     0.0012916  -9.654
## bay_dem_distancing_pre_post$`percent associates or higher`  0.0007656   8.987
## bay_dem_distancing_pre_post$`% not speaking spanish`        0.0007341   5.249
## bay_dem_distancing_pre_post$`percent with vehicles`         0.0007466  12.087
##                                                            Pr(>|t|)    
## (Intercept)                                                 0.00254 ** 
## bay_dem_distancing_pre_post$`% over 125,000`                < 2e-16 ***
## bay_dem_distancing_pre_post$`% speaking english > well`     < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher`  < 2e-16 ***
## bay_dem_distancing_pre_post$`% not speaking spanish`       1.59e-07 ***
## bay_dem_distancing_pre_post$`percent with vehicles`         < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6016 on 4737 degrees of freedom
## Multiple R-squared:  0.3617, Adjusted R-squared:  0.361 
## F-statistic: 536.8 on 5 and 4737 DF,  p-value: < 2.2e-16

Multiple regression analysis: Hispanic/Latino, income, and education

difs_model_inc_hisp_educ <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000` +  bay_dem_distancing_pre_post$`% non hispanic/latino` + bay_dem_distancing_pre_post$`percent associates or higher`)
summary(difs_model_inc_hisp_educ)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% non hispanic/latino` + 
##         bay_dem_distancing_pre_post$`percent associates or higher`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -67.957  -4.661   0.802   5.870  25.842 
## 
## Coefficients:
##                                                             Estimate Std. Error
## (Intercept)                                                10.804737   0.539092
## bay_dem_distancing_pre_post$`% over 125,000`                0.236815   0.009258
## bay_dem_distancing_pre_post$`% non hispanic/latino`         0.028591   0.009812
## bay_dem_distancing_pre_post$`percent associates or higher`  0.040655   0.011558
##                                                            t value Pr(>|t|)    
## (Intercept)                                                 20.042  < 2e-16 ***
## bay_dem_distancing_pre_post$`% over 125,000`                25.578  < 2e-16 ***
## bay_dem_distancing_pre_post$`% non hispanic/latino`          2.914  0.00359 ** 
## bay_dem_distancing_pre_post$`percent associates or higher`   3.517  0.00044 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.081 on 4739 degrees of freedom
## Multiple R-squared:  0.2939, Adjusted R-squared:  0.2934 
## F-statistic: 657.5 on 3 and 4739 DF,  p-value: < 2.2e-16
frac_model_inc_hisp_educ <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` +  bay_dem_distancing_pre_post$`% non hispanic/latino` + bay_dem_distancing_pre_post$`percent associates or higher`)
summary(frac_model_inc_hisp_educ)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + 
##     bay_dem_distancing_pre_post$`% non hispanic/latino` + bay_dem_distancing_pre_post$`percent associates or higher`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5111 -0.3460 -0.0157  0.3236  3.6078 
## 
## Coefficients:
##                                                             Estimate Std. Error
## (Intercept)                                                0.2185664  0.0363438
## bay_dem_distancing_pre_post$`% over 125,000`               0.0173985  0.0006242
## bay_dem_distancing_pre_post$`% non hispanic/latino`        0.0020562  0.0006615
## bay_dem_distancing_pre_post$`percent associates or higher` 0.0034850  0.0007792
##                                                            t value Pr(>|t|)    
## (Intercept)                                                  6.014 1.95e-09 ***
## bay_dem_distancing_pre_post$`% over 125,000`                27.875  < 2e-16 ***
## bay_dem_distancing_pre_post$`% non hispanic/latino`          3.108  0.00189 ** 
## bay_dem_distancing_pre_post$`percent associates or higher`   4.472 7.92e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6122 on 4739 degrees of freedom
## Multiple R-squared:  0.3386, Adjusted R-squared:  0.3382 
## F-statistic: 808.7 on 3 and 4739 DF,  p-value: < 2.2e-16

Multiple regression analysis: income, education, and white residents

difs_model_inc_white_educ <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000` +  bay_dem_distancing_pre_post$`% white` + bay_dem_distancing_pre_post$`percent associates or higher`)
summary(difs_model_inc_white_educ)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% white` + 
##         bay_dem_distancing_pre_post$`percent associates or higher`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -64.577  -4.346   0.865   5.637  25.566 
## 
## Coefficients:
##                                                             Estimate Std. Error
## (Intercept)                                                15.481337   0.395815
## bay_dem_distancing_pre_post$`% over 125,000`                0.244686   0.008961
## bay_dem_distancing_pre_post$`% white`                      -0.100580   0.005614
## bay_dem_distancing_pre_post$`percent associates or higher`  0.093119   0.009340
##                                                            t value Pr(>|t|)    
## (Intercept)                                                  39.11   <2e-16 ***
## bay_dem_distancing_pre_post$`% over 125,000`                 27.31   <2e-16 ***
## bay_dem_distancing_pre_post$`% white`                       -17.91   <2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher`    9.97   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.796 on 4739 degrees of freedom
## Multiple R-squared:  0.3375, Adjusted R-squared:  0.3371 
## F-statistic: 804.7 on 3 and 4739 DF,  p-value: < 2.2e-16
frac_model_inc_white_educ <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` +  bay_dem_distancing_pre_post$`% white` + bay_dem_distancing_pre_post$`percent associates or higher`)
summary(frac_model_inc_white_educ)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + 
##     bay_dem_distancing_pre_post$`% white` + bay_dem_distancing_pre_post$`percent associates or higher`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5993 -0.3464 -0.0221  0.3214  3.5394 
## 
## Coefficients:
##                                                              Estimate
## (Intercept)                                                 0.3755433
## bay_dem_distancing_pre_post$`% over 125,000`                0.0176384
## bay_dem_distancing_pre_post$`% white`                      -0.0020731
## bay_dem_distancing_pre_post$`percent associates or higher`  0.0055609
##                                                            Std. Error t value
## (Intercept)                                                 0.0274949  13.659
## bay_dem_distancing_pre_post$`% over 125,000`                0.0006225  28.337
## bay_dem_distancing_pre_post$`% white`                       0.0003900  -5.316
## bay_dem_distancing_pre_post$`percent associates or higher`  0.0006488   8.572
##                                                            Pr(>|t|)    
## (Intercept)                                                 < 2e-16 ***
## bay_dem_distancing_pre_post$`% over 125,000`                < 2e-16 ***
## bay_dem_distancing_pre_post$`% white`                      1.11e-07 ***
## bay_dem_distancing_pre_post$`percent associates or higher`  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.611 on 4739 degrees of freedom
## Multiple R-squared:  0.3412, Adjusted R-squared:  0.3408 
## F-statistic:   818 on 3 and 4739 DF,  p-value: < 2.2e-16

Multiple regression analysis: income, education, and Asian residents

difs_model_inc_asian_educ <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000` +  bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher`)
summary(difs_model_inc_asian_educ)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% Asian` + 
##         bay_dem_distancing_pre_post$`percent associates or higher`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -63.773  -4.319   0.846   5.470  25.900 
## 
## Coefficients:
##                                                             Estimate Std. Error
## (Intercept)                                                10.171629   0.344844
## bay_dem_distancing_pre_post$`% over 125,000`                0.216081   0.008794
## bay_dem_distancing_pre_post$`% Asian`                       0.146978   0.006176
## bay_dem_distancing_pre_post$`percent associates or higher`  0.044416   0.008965
##                                                            t value Pr(>|t|)    
## (Intercept)                                                 29.496  < 2e-16 ***
## bay_dem_distancing_pre_post$`% over 125,000`                24.571  < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian`                       23.798  < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher`   4.954 7.51e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.59 on 4739 degrees of freedom
## Multiple R-squared:  0.3681, Adjusted R-squared:  0.3677 
## F-statistic: 920.4 on 3 and 4739 DF,  p-value: < 2.2e-16
frac_model_inc_asian_educ <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` +  bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher`)
summary(frac_model_inc_asian_educ)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + 
##     bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5673 -0.3471 -0.0238  0.3108  3.4445 
## 
## Coefficients:
##                                                             Estimate Std. Error
## (Intercept)                                                0.2473638  0.0243231
## bay_dem_distancing_pre_post$`% over 125,000`               0.0168190  0.0006203
## bay_dem_distancing_pre_post$`% Asian`                      0.0045477  0.0004356
## bay_dem_distancing_pre_post$`percent associates or higher` 0.0043957  0.0006323
##                                                            t value Pr(>|t|)    
## (Intercept)                                                 10.170  < 2e-16 ***
## bay_dem_distancing_pre_post$`% over 125,000`                27.115  < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian`                       10.439  < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher`   6.951 4.11e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6059 on 4739 degrees of freedom
## Multiple R-squared:  0.3521, Adjusted R-squared:  0.3517 
## F-statistic: 858.6 on 3 and 4739 DF,  p-value: < 2.2e-16

Multiple regression analysis: income, education, Asian residents, and English language ability

difs_model_inc_asian_educ_eng <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000` +  bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`% speaking english > well`)
summary(difs_model_inc_asian_educ_eng)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% Asian` + 
##         bay_dem_distancing_pre_post$`percent associates or higher` + 
##         bay_dem_distancing_pre_post$`% speaking english > well`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -63.380  -4.339   0.806   5.473  25.909 
## 
## Coefficients:
##                                                            Estimate Std. Error
## (Intercept)                                                8.980148   1.526570
## bay_dem_distancing_pre_post$`% over 125,000`               0.215010   0.008895
## bay_dem_distancing_pre_post$`% Asian`                      0.149463   0.006911
## bay_dem_distancing_pre_post$`percent associates or higher` 0.041008   0.009923
## bay_dem_distancing_pre_post$`% speaking english > well`    0.014606   0.018230
##                                                            t value Pr(>|t|)    
## (Intercept)                                                  5.883 4.32e-09 ***
## bay_dem_distancing_pre_post$`% over 125,000`                24.171  < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian`                       21.625  < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher`   4.132 3.65e-05 ***
## bay_dem_distancing_pre_post$`% speaking english > well`      0.801    0.423    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.591 on 4738 degrees of freedom
## Multiple R-squared:  0.3682, Adjusted R-squared:  0.3677 
## F-statistic: 690.4 on 4 and 4738 DF,  p-value: < 2.2e-16
frac_model_inc_asian_educ_eng <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` +  bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`% speaking english > well`)
summary(frac_model_inc_asian_educ_eng)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + 
##     bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` + 
##     bay_dem_distancing_pre_post$`% speaking english > well`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5678 -0.3470 -0.0234  0.3107  3.4454 
## 
## Coefficients:
##                                                              Estimate
## (Intercept)                                                 0.2593220
## bay_dem_distancing_pre_post$`% over 125,000`                0.0168298
## bay_dem_distancing_pre_post$`% Asian`                       0.0045227
## bay_dem_distancing_pre_post$`percent associates or higher`  0.0044299
## bay_dem_distancing_pre_post$`% speaking english > well`    -0.0001466
##                                                            Std. Error t value
## (Intercept)                                                 0.1076818   2.408
## bay_dem_distancing_pre_post$`% over 125,000`                0.0006275  26.822
## bay_dem_distancing_pre_post$`% Asian`                       0.0004875   9.277
## bay_dem_distancing_pre_post$`percent associates or higher`  0.0007000   6.329
## bay_dem_distancing_pre_post$`% speaking english > well`     0.0012859  -0.114
##                                                            Pr(>|t|)    
## (Intercept)                                                  0.0161 *  
## bay_dem_distancing_pre_post$`% over 125,000`                < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian`                       < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher`  2.7e-10 ***
## bay_dem_distancing_pre_post$`% speaking english > well`      0.9092    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.606 on 4738 degrees of freedom
## Multiple R-squared:  0.3521, Adjusted R-squared:  0.3516 
## F-statistic: 643.8 on 4 and 4738 DF,  p-value: < 2.2e-16

Multiple regression analysis: income, education, Asian residents, and high speed internet access

difs_model_inc_asian_educ_internet <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000` +  bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`percent high speed`)
summary(difs_model_inc_asian_educ_internet)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% Asian` + 
##         bay_dem_distancing_pre_post$`percent associates or higher` + 
##         bay_dem_distancing_pre_post$`percent high speed`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -63.113  -4.243   0.765   5.392  25.016 
## 
## Coefficients:
##                                                            Estimate Std. Error
## (Intercept)                                                4.920983   0.744977
## bay_dem_distancing_pre_post$`% over 125,000`               0.191355   0.009276
## bay_dem_distancing_pre_post$`% Asian`                      0.142700   0.006160
## bay_dem_distancing_pre_post$`percent associates or higher` 0.026552   0.009187
## bay_dem_distancing_pre_post$`percent high speed`           0.091126   0.011481
##                                                            t value Pr(>|t|)    
## (Intercept)                                                  6.606 4.40e-11 ***
## bay_dem_distancing_pre_post$`% over 125,000`                20.629  < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian`                       23.167  < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher`   2.890  0.00387 ** 
## bay_dem_distancing_pre_post$`percent high speed`             7.937 2.56e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.535 on 4738 degrees of freedom
## Multiple R-squared:  0.3764, Adjusted R-squared:  0.3759 
## F-statistic: 715.1 on 4 and 4738 DF,  p-value: < 2.2e-16
frac_model_inc_asian_educ_internet <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` +  bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`percent high speed`)
summary(frac_model_inc_asian_educ_internet)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + 
##     bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` + 
##     bay_dem_distancing_pre_post$`percent high speed`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4813 -0.3503 -0.0241  0.3029  3.4331 
## 
## Coefficients:
##                                                             Estimate Std. Error
## (Intercept)                                                0.0320957  0.0527768
## bay_dem_distancing_pre_post$`% over 125,000`               0.0158053  0.0006571
## bay_dem_distancing_pre_post$`% Asian`                      0.0043723  0.0004364
## bay_dem_distancing_pre_post$`percent associates or higher` 0.0036633  0.0006508
## bay_dem_distancing_pre_post$`percent high speed`           0.0037360  0.0008133
##                                                            t value Pr(>|t|)    
## (Intercept)                                                  0.608    0.543    
## bay_dem_distancing_pre_post$`% over 125,000`                24.052  < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian`                       10.019  < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher`   5.629 1.92e-08 ***
## bay_dem_distancing_pre_post$`percent high speed`             4.593 4.47e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6046 on 4738 degrees of freedom
## Multiple R-squared:  0.355,  Adjusted R-squared:  0.3545 
## F-statistic:   652 on 4 and 4738 DF,  p-value: < 2.2e-16

Multiple regression analysis: income, education, Asian residents, and vehicle ownership

difs_model_inc_asian_educ_vehicle <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000` +  bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`percent with vehicles`)
summary(difs_model_inc_asian_educ_vehicle)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% Asian` + 
##         bay_dem_distancing_pre_post$`percent associates or higher` + 
##         bay_dem_distancing_pre_post$`percent with vehicles`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -64.211  -4.317   0.752   5.442  26.869 
## 
## Coefficients:
##                                                            Estimate Std. Error
## (Intercept)                                                1.966897   0.937917
## bay_dem_distancing_pre_post$`% over 125,000`               0.186136   0.009279
## bay_dem_distancing_pre_post$`% Asian`                      0.152654   0.006150
## bay_dem_distancing_pre_post$`percent associates or higher` 0.059307   0.009024
## bay_dem_distancing_pre_post$`percent with vehicles`        0.093000   0.009901
##                                                            t value Pr(>|t|)    
## (Intercept)                                                  2.097    0.036 *  
## bay_dem_distancing_pre_post$`% over 125,000`                20.060  < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian`                       24.823  < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher`   6.572  5.5e-11 ***
## bay_dem_distancing_pre_post$`percent with vehicles`          9.393  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.512 on 4738 degrees of freedom
## Multiple R-squared:  0.3797, Adjusted R-squared:  0.3792 
## F-statistic:   725 on 4 and 4738 DF,  p-value: < 2.2e-16
frac_model_inc_asian_educ_vehicle <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` +  bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`percent with vehicles`)
summary(frac_model_inc_asian_educ_vehicle)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + 
##     bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` + 
##     bay_dem_distancing_pre_post$`percent with vehicles`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5398 -0.3487 -0.0304  0.3030  3.4025 
## 
## Coefficients:
##                                                              Estimate
## (Intercept)                                                -0.3860752
## bay_dem_distancing_pre_post$`% over 125,000`                0.0145071
## bay_dem_distancing_pre_post$`% Asian`                       0.0049859
## bay_dem_distancing_pre_post$`percent associates or higher`  0.0055453
## bay_dem_distancing_pre_post$`percent with vehicles`         0.0071800
##                                                            Std. Error t value
## (Intercept)                                                 0.0660327  -5.847
## bay_dem_distancing_pre_post$`% over 125,000`                0.0006533  22.207
## bay_dem_distancing_pre_post$`% Asian`                       0.0004330  11.516
## bay_dem_distancing_pre_post$`percent associates or higher`  0.0006353   8.728
## bay_dem_distancing_pre_post$`percent with vehicles`         0.0006970  10.301
##                                                            Pr(>|t|)    
## (Intercept)                                                5.35e-09 ***
## bay_dem_distancing_pre_post$`% over 125,000`                < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian`                       < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher`  < 2e-16 ***
## bay_dem_distancing_pre_post$`percent with vehicles`         < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5993 on 4738 degrees of freedom
## Multiple R-squared:  0.3663, Adjusted R-squared:  0.3658 
## F-statistic: 684.8 on 4 and 4738 DF,  p-value: < 2.2e-16

Multiple regression analysis: income, education, Asian residents, high speed internet access, and vehicle ownership

difs_model_inc_asian_educ_vehicle <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000` +  bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`percent with vehicles` + bay_dem_distancing_pre_post$`percent high speed`)
summary(difs_model_inc_asian_educ_vehicle)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% Asian` + 
##         bay_dem_distancing_pre_post$`percent associates or higher` + 
##         bay_dem_distancing_pre_post$`percent with vehicles` + 
##         bay_dem_distancing_pre_post$`percent high speed`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -63.700  -4.286   0.735   5.447  27.725 
## 
## Coefficients:
##                                                            Estimate Std. Error
## (Intercept)                                                0.374573   0.996725
## bay_dem_distancing_pre_post$`% over 125,000`               0.176801   0.009475
## bay_dem_distancing_pre_post$`% Asian`                      0.148745   0.006194
## bay_dem_distancing_pre_post$`percent associates or higher` 0.044844   0.009528
## bay_dem_distancing_pre_post$`percent with vehicles`        0.073349   0.010747
## bay_dem_distancing_pre_post$`percent high speed`           0.057724   0.012430
##                                                            t value Pr(>|t|)    
## (Intercept)                                                  0.376    0.707    
## bay_dem_distancing_pre_post$`% over 125,000`                18.660  < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian`                       24.014  < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher`   4.707 2.59e-06 ***
## bay_dem_distancing_pre_post$`percent with vehicles`          6.825 9.91e-12 ***
## bay_dem_distancing_pre_post$`percent high speed`             4.644 3.51e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.494 on 4737 degrees of freedom
## Multiple R-squared:  0.3825, Adjusted R-squared:  0.3819 
## F-statistic: 586.9 on 5 and 4737 DF,  p-value: < 2.2e-16
frac_model_inc_asian_educ_vehicle <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` +  bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`percent with vehicles` + bay_dem_distancing_pre_post$`percent high speed`)
summary(frac_model_inc_asian_educ_vehicle)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + 
##     bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` + 
##     bay_dem_distancing_pre_post$`percent with vehicles` + bay_dem_distancing_pre_post$`percent high speed`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5278 -0.3496 -0.0302  0.3040  3.4030 
## 
## Coefficients:
##                                                              Estimate
## (Intercept)                                                -0.4012985
## bay_dem_distancing_pre_post$`% over 125,000`                0.0144179
## bay_dem_distancing_pre_post$`% Asian`                       0.0049485
## bay_dem_distancing_pre_post$`percent associates or higher`  0.0054070
## bay_dem_distancing_pre_post$`percent with vehicles`         0.0069921
## bay_dem_distancing_pre_post$`percent high speed`            0.0005519
##                                                            Std. Error t value
## (Intercept)                                                 0.0703295  -5.706
## bay_dem_distancing_pre_post$`% over 125,000`                0.0006685  21.566
## bay_dem_distancing_pre_post$`% Asian`                       0.0004371  11.322
## bay_dem_distancing_pre_post$`percent associates or higher`  0.0006723   8.043
## bay_dem_distancing_pre_post$`percent with vehicles`         0.0007583   9.220
## bay_dem_distancing_pre_post$`percent high speed`            0.0008771   0.629
##                                                            Pr(>|t|)    
## (Intercept)                                                1.23e-08 ***
## bay_dem_distancing_pre_post$`% over 125,000`                < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian`                       < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher` 1.10e-15 ***
## bay_dem_distancing_pre_post$`percent with vehicles`         < 2e-16 ***
## bay_dem_distancing_pre_post$`percent high speed`              0.529    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5993 on 4737 degrees of freedom
## Multiple R-squared:  0.3664, Adjusted R-squared:  0.3657 
## F-statistic: 547.8 on 5 and 4737 DF,  p-value: < 2.2e-16

This model seems to capture the most variation, though it is only an improvement of about 1% of the variation predicted than the previous one with solely income, education, and Asian residents.